<?php
//    require_once('class.BasicAnalyzer.php');
    //require_once('dig.all.php');
    /**
    * 用于提取 tudou 网站的视频
    *
    */
    class TudouAnalyzer extends BasicAnalyzer implements Analyzer
    {
        public $features = array("url" => "/www.tudou.com\/programs\/view\//i");

        public function analyze($content, $url)
        {
            preg_match('/var iid=(.*?);/', $content, $matches);
            if (empty($matches)) return array();
            $iid = $matches[1];
            $geturl="http://www.tudou.com/player/v.php";
            $req = new HTTP_Request($geturl);
            $req->setMethod(HTTP_REQUEST_METHOD_POST);
            $req->addPostData("id", "$iid");
            $req->sendRequest();
            $xml =  $req->getResponseBody();
            preg_match('/<f.*?>(.*?)<\/f>/i', $xml, $matches);
            $vurl = $matches[1];
            preg_match("/q='(.*?)'/i", $xml, $matches);
            $title = $matches[1];
            preg_match("/<meta name=\"Description\" content=\"([^\"]*)/i", $content, $matches);
            $descr = iconv("gbk//ignore", "utf-8//ignore", $matches[1]);
            $referer = $url;
            preg_match("/<meta name=\"Keywords\" content=\"(.*?)\"/i", $content, $matches);
            $tags_str = iconv("gbk//ignore", "utf-8//ignore", $matches[1]);
            $tags = explode(",", $tags_str);
            $tags[] = "sr_tudou";
            $category = "";
            $ext = "flv";
            
            preg_match("/<li class=\"publishTime\">.*?: (.*?)<\/li>/i", $content, $matches);

            $pub_date = $matches[1];
            $pub_date = str_replace(".", "/", $pub_date);

            preg_match("/<li[^>]*>[^<]*<a href=[\"']\/home[^>]*>([^<]*)<\/a>[^<]*<\/li>/i",$content,$matches);
            //var_dump($matches);

            $author = iconv("gbk//ignore", "utf-8//ignore", $matches[1]);

            //===================================
            $rv = array(
                array(
                    "url" => $vurl, 
                    "title" => $title,
                    "descr" => $descr,
                    "referer" => $referer,
                    "tags" => $tags, 
                    "category" => $category,
                    "ext" => $ext,
                    "pub_date" => $pub_date,
                    "author" => $author
                    )
                );

            
            //===================================

            return $rv;
        }
        function test($url)
        {
            $content = $this->getPage($url);
            return $this->analyze($content, $url);
        }


    }
    //$a = new TudouAnalyzer();
    //var_dump($a->test("http://www.tudou.com/programs/view/qFufmJuexPU/"));

?>
